clear

*Load Data
use "./data_geo.dta", clear

*****************************************************************************
*** Setup: see Kashyap & Stein (2000) and/or Den Haan, Sumner, Yamashiro (2002) for details ***

*Drop non-states 
drop if rssd9200 == "0"
keep if  rssd9210>0 & rssd9210< 57

*Drop Delaware and/or South Dakota (see Jayaratne & Strahan 1998 JLE)
*drop if rssd9210 == 10
*drop if rssd9210 == 46

*Sample period
keep if year_q < tq(1995q1)

*Positive Assets
keep if rcfd2170 > 0

*Commercial Banks Only
keep if rssd9048 == 200 

*Insured
*keep if rssd9424 == 1 | rssd9424 == 2 | rssd9424 == 6
drop if rssd9424 == 0

*Merger
drop if MERGE_CD != .

*Set panel data
xtset rssd9001 year_q

*Fix Total Loans pre-1984 (insert before calcuating loans2 above)
gen loans2 = rcfd1400
replace loans2 = rcfd1400+rcfd2165 if year_q < tq(1984q1)

*Put loans and loan components in real terms
gen c_p_i = cpi/100
gen r_loans = loans2/c_p_i 
gen r_ci = rcfd1600/c_p_i
gen r_rlest = rcfd1410/c_p_i
gen r_con = rcfd1975/c_p_i
gen r_ag = rcfd1590/c_p_i

*Log difference 
gen loans = ln(r_loans) - ln(L.r_loans)
gen ci = ln(r_ci) - ln(L.r_ci) 
gen re = ln(r_rlest) - ln(L.r_rlest) 
gen con = ln(r_con) - ln(L.r_con)
gen ag = ln(r_ag) - ln(L.r_ag) 

*Drop outliers
sort year_q
by year_q: egen sdv = sd(loans)
drop if loans >= 5*sdv
drop if loans <= -5*sdv

*Loan Component Outliers
gen ci_share = rcfd1600/loans2
*drop if ci_share < 0.05
gen re_share = rcfd1410/loans2
*drop if re_share < 0.05
gen con_share = rcfd1975/loans2
*drop if con_share < 0.05
gen ag_share = rcfd1590/loans2
*drop if ag_share < 0.05

*Four Consecutive Quarters of Loan Growth
*ssc install tsspell
tsspell, c(loans > 0)
sort rssd9001
by rssd9001: egen maxrun = max(_seq)
drop if maxrun < 4

*Percentiles
sort year_q
by year_q: egen p95 = pctile(rcfd2170), p(95)
by year_q: egen p99 = pctile(rcfd2170), p(99)

*Generate Quarter/Year/timetrend Series
sort rssd9001 year_q
gen Quarter = quarter(date2)
gen Year = year(date2) 
egen time = group(year_q)

*Choose to keep (or drop) year deregulation was implemented 
replace intra = 1 if intra == .
replace inter =1 if inter == .

*******************************************************************************
*** Bank-level Characteristics ***

* BHC Status
gen bhc = 0
replace bhc = 1 if rssd9379 != 0

* Size (total assets)
gen Size = ln(rcfd2170)

* Liquidity ratio
gen Liq = rcfd0010/rcfd2950

*Equity raito
gen Cap = rcfd3210/rcfd2170

* Securities
gen Sec = rcfd0390 + rcfd1350
gen sec84 = rcfd0400 + rcfd0600 + rcfd0900 + rcfd0380 + rcfd1350
replace Sec = sec84 if year_q < tq(1984q1) 


*******************************************************************************
********************************************************************************
*** BHC Status ***

gen prebhc = 0
replace prebhc = 1 if bhc==1 & inter==0
gen postbhc = 0
replace postbhc = 1 if bhc==1 & inter==1 

sort rssd9001
by rssd9001: egen affiliated = max(bhc)
by rssd9001: egen pre_bhc = max(prebhc)
by rssd9001: egen post_bhc = max(postbhc)
sort rssd9001 year_q

*******************************************************************************
*** BHC Variables ***
*keep if bhc == 1
*keep if affiliated == 1
gen ones = 1
sort rssd9379 year_q
by rssd9379 year_q: egen hc_banks = sum(ones)
by rssd9379 year_q: egen hc_assets = sum(rcfd2170)
by rssd9379 year_q: egen hc_loans = sum(loans2)
by rssd9379 year_q: egen hc_cap = sum(rcfd3210)
by rssd9379 year_q: egen hc_dep = sum(rcfd2200)
*by rssd9379 year_q: egen hc_emp = sum(rcfd4150)
replace hc_banks = 0 if bhc == 0
replace hc_assets = 0 if bhc == 0
replace hc_loans = 0 if bhc == 0
replace hc_cap = 0 if bhc == 0
replace hc_dep = 0 if bhc == 0
gen lnhc_assets = log(hc_assets)
replace lnhc_assets = 0 if lnhc_assets == .
gen hc_eq = log(hc_cap)
replace hc_eq = 0 if hc_eq == .


*** Extend BHC Distance post-1994 ***
replace bhc_distance = 0 if bhc_distance==. & Year <= 1994
*sum bhc_distance
*replace bhc_distance = bhc_distance[_n-1] if bhc_distance==. & Year > 1994
*sum bhc_distance

sort rssd9001 year_q
gen assetr = rcfd2170/hc_assets
gen loansr = loans2/hc_loans
gen capr = rcfd3210/hc_cap
gen depr = rcfd2200/hc_dep
*gen empr = rcfd4150/hc_emp
replace assetr = 1 if bhc == 0
replace loansr = 1 if bhc == 0
replace capr = 1 if bhc == 0
replace capr = 1 if capr > 1
replace capr = 0 if capr < 0
replace depr = 1 if bhc == 0

sort rssd9001 year_q

*gen linter = L.inter
*drop inter
*gen inter = linter

*collapse (mean) bhc bhc_distance hc_banks lnhc_assets hc_eq assetr capr inter pi_pchg hpi_chg rssd9210 
********************************************************************************

sum bhc_distance if rcfd2170 < p95 & pre_bhc==1, detail
sum bhc_distance if rcfd2170 < p95 & pre_bhc==1 & Year < 1986, detail
sum bhc_distance if rcfd2170 < p95 & pre_bhc==1 & Year >= 1986, detail


sum hc_assets if rcfd2170 < p95 & pre_bhc==1, detail
sum hc_assets if rcfd2170 < p95 & pre_bhc==1 & Year < 1986, detail
sum hc_assets if rcfd2170 < p95 & pre_bhc==1 & Year >= 1986, detail


sum hc_cap if rcfd2170 < p95 & pre_bhc==1, detail
sum hc_cap if rcfd2170 < p95 & pre_bhc==1 & Year < 1986, detail
sum hc_cap if rcfd2170 < p95 & pre_bhc==1 & Year >= 1986, detail


sum assetr if rcfd2170 < p95 & pre_bhc==1, detail
sum assetr if rcfd2170 < p95 & pre_bhc==1 & Year < 1986, detail
sum assetr if rcfd2170 < p95 & pre_bhc==1 & Year >= 1986, detail


sum capr if rcfd2170 < p95 & pre_bhc==1, detail
sum capr if rcfd2170 < p95 & pre_bhc==1 & Year < 1986, detail
sum capr if rcfd2170 < p95 & pre_bhc==1 & Year >= 1986, detail
********************************************************************************